Challenge 9

Author

Harshini Karthikeyan

Published

December 4, 2022

1 Data and Set Up

Show code
library(tidyverse)
library(dplyr)
library(kableExtra)
library(DT)
a_names_data <- read_csv(here::here("supporting_artifacts",
                                    "Labs", "Lab_9_Final",
                                    "StateNames_A.csv"))

1.1 Data

Show code
# source: https://rstudio.github.io/DT/
datatable(a_names_data, class = "display",
    callback = JS("return table;"), filter = "top",
    escape = TRUE,
    style = "auto",
    selection = "multiple", extensions = list(),
    editable = FALSE)

2 Plot

Removed excess grid lines, chose colors, chose fonts, and moved plot title, as well as added a caption.

Show code
allison_f <- a_names_data |>
  filter(Gender == 'F') |>
  pivot_wider(names_from = Gender, 
              values_from = Count) |>
  mutate(F = replace_na(F, 0)) |>
  filter(Name == 'Allison') |>
  group_by(Year) |>
  summarize(F = sum(F), .groups = 'rowwise') |>
  distinct()

ggplot(data = allison_f, aes(x = Year, y = F)) +
  geom_line(color = 'darkgreen') +
  theme( plot.title.position = "plot",
         plot.title = element_text(vjust = 1,
                                   face = "italic",
                                   family = "serif"),
         plot.subtitle = element_text(family = 'serif',
                                      face = 'italic'), 
         axis.line = element_line(color = 'lightgray',
                                  linewidth =  1,
                                  linetype = 'solid'),
                  panel.grid.major = element_blank()
         ) +
  labs( title = "Number of babies named Allison over Time",
        subtitle = 'in the US',
        x = " ", y = " " ) +
  scale_x_continuous(breaks = seq(1997, 2015, 2)) +
  scale_y_continuous(breaks = seq(5000, 8000, 500))

3 Allison Table

Show code
x <- a_names_data |>   
  pivot_wider(names_from = Gender, 
              values_from = Count) |>
  mutate(across(.cols = c(F, M),
                ~ (replace_na(., 0)))) |>
  filter(Name == 'Allison') |>
  group_by(State) |>
  summarize( F = sum(F), M = sum(M), .groups = "rowwise") |>
  distinct()

knitr::kable(x, col.names = c('State', 'Female born babies named Allison', 'Male born babies named Allison'), 'pipe', align = 'lccr')
State Female born babies named Allison Male born babies named Allison
AK 232 0
AL 1535 0
AR 1198 0
AZ 1880 0
CA 12413 0
CO 1594 0
CT 1099 0
DC 321 0
DE 294 0
FL 4455 0
GA 3257 0
HI 183 0
IA 1477 0
ID 451 0
IL 5110 0
IN 3067 0
KS 1283 0
KY 1905 20
LA 1209 0
MA 2218 0
MD 2229 0
ME 340 0
MI 4014 0
MN 2374 0
MO 2882 0
MS 817 0
MT 226 0
NC 3435 0
ND 285 0
NE 807 0
NH 412 0
NJ 3052 0
NM 399 0
NV 729 0
NY 5747 0
OH 5487 0
OK 1421 0
OR 1186 0
PA 4307 0
RI 306 0
SC 1228 0
SD 376 0
TN 2488 0
TX 10192 0
UT 1125 0
VA 3220 0
VT 135 0
WA 1956 0
WI 2367 0
WV 813 0
WY 142 0

Filtering for female sex only and creating new data set for graphing.

Show code
allison_f <- a_names_data |>
  filter(Gender == 'F') |>
  pivot_wider(names_from = Gender, 
              values_from = Count) |>
  mutate(F = replace_na(F, 0)) |>
  filter(Name == 'Allison') |>
  group_by(Year) |>
  summarize(F = sum(F), .groups = 'rowwise') |>
  distinct()

knitr::kable(allison_f, col.names = c('Year', 'Number of Babies named Allison'), 'pipe', align = 'lc')
Year Number of Babies named Allison
1997 7274
1998 7861
1999 7023
2000 6314
2001 6209
2002 6237
2003 5850
2004 5871
2005 5631
2006 5560
2007 5450
2008 6237
2009 6579
2010 5856
2011 5453
2012 5411
2013 5422
2014 5440

3.1 Spelling by State

Show code
al <- a_names_data |>
  filter(Name == c('Allan', 'Alan', 'Allen'))

al_year <- al |>
  group_by(Year, Name) |>
  summarize(Count = sum(Count),
            .groups = 'rowwise') 
kable(al_year, format = 'html',
      col.names = c('Year', 'Name',
                    'Number of babies named as such'
                                    ),
              align = 'lcl') |>
  kable_styling('striped') |>
  kable_classic(html_font = 'Arial')
Year Name Number of babies named as such
1997 Alan 311
1997 Allan 251
1997 Allen 215
1998 Alan 384
1998 Allan 85
1998 Allen 603
1999 Alan 311
1999 Allan 69
1999 Allen 239
2000 Alan 1054
2000 Allan 84
2000 Allen 193
2001 Alan 752
2001 Allan 25
2001 Allen 477
2002 Alan 1127
2002 Allan 180
2002 Allen 217
2003 Alan 818
2003 Allan 75
2003 Allen 547
2004 Alan 323
2004 Allan 281
2004 Allen 379
2005 Alan 564
2005 Allan 37
2005 Allen 225
2006 Alan 538
2006 Allan 244
2006 Allen 356
2007 Alan 1172
2007 Allan 108
2007 Allen 502
2008 Alan 681
2008 Allan 294
2008 Allen 417
2009 Alan 1334
2009 Allan 79
2009 Allen 354
2010 Alan 1483
2010 Allan 91
2010 Allen 424
2011 Alan 1088
2011 Allan 187
2011 Allen 521
2012 Alan 431
2012 Allan 80
2012 Allen 244
2013 Alan 509
2013 Allan 105
2013 Allen 159
2014 Alan 476
2014 Allan 13
2014 Allen 433
Show code
al_state <- 
  a_names_data |>
  pivot_wider(names_from = Gender, 
              values_from = Count) |>
  pivot_wider(names_from = Name, 
              values_from = M) |> 
  filter(Year == 2000, (State == 'PA'|State == 'CA')) |>
  select(State, Allan, Alan, Allen) |>
  group_by(State)|>
  summarise(across(.cols = c(Allan, Alan, Allen), 
                   ~sum(., na.rm = TRUE)),
            .groups ='rowwise')
knitr::kable(al_state,
                               col.names = c('State',
                                     'Babies named Allan',
                                     'Babies named Alan',
                                     'Babies named Allen'),
                         'pipe',
             align = 'lllll', )
State Babies named Allan Babies named Alan Babies named Allen
CA 131 579 176
PA 12 51 56
Show code
al_state_prop <- al_state |> 
  mutate(Tot = (Allan + Alan + Allen),
         across(.cols = c(Allan, Alan, Allen, Tot),
                .fns = ~ format(round((./Tot), 3
                                      ), nsmall = 3)))
knitr::kable(al_state_prop,
             col.names = c('State',
                           'Proportion of Babies born named Allan',
                           'Proportion of Babies born, named Alan',
                           'Proportion of Babies born, named Allen',
                           'Total'), 'pipe')
State Proportion of Babies born named Allan Proportion of Babies born, named Alan Proportion of Babies born, named Allen Total
CA 0.148 0.653 0.199 1.000
PA 0.101 0.429 0.471 1.000